rm(list=ls())  # clean up workspace
setwd("/Users/xji3/GitFolders/YeastIGCTract/SimulationStudy/")

Tract.list <- c(3.0, 10.0, 50.0, 100.0, 200.0, 300.0, 400.0, 500.0)
# Now read in actual mean tract length in each simulated dataset
for (tract in Tract.list){
  sim.tract <- NULL
  for(sim in 1:100){
    pos.list <- matrix(0, 492, 1)
    sim_log <- paste("./Tract_", toString(tract), ".0_HKY/sim_", toString(sim), 
                     "/YDR418W_YEL054C_sim_", toString(sim), "_IGC.log", sep = "")
    # now read in log file
    log_info <- read.table(sim_log, header = TRUE)
    performed.tract.length <- log_info[, "stop_pos"] - log_info[, "start_pos"] + 1
    proposed.tract.length <- log_info[, "tract_length"]
    diff.tracts <- log_info[, "num_diff"] > 0
    new.info <- matrix(c(dim(log_info)[1], sum(log_info[, "orlg_from"] == 1), sum(log_info[, "orlg_from"] == 2),
                         mean(proposed.tract.length), sd(proposed.tract.length), 
                         mean(performed.tract.length), sd(performed.tract.length),
                         mean(proposed.tract.length[diff.tracts]), 
                         mean(performed.tract.length[diff.tracts])), 9, 1)
    rownames(new.info) <- c("num IGC", "num 1->2 IGC", "num 2->1 IGC",
                            "mean proposed tract length", "sd proposed tract length", 
                            "mean performed tract length", "sd performed tract length", 
                            "mean proposed nonidentical tract length", "mean performed nonidentical tract length")
    colnames(new.info) <- paste("sim_", toString(sim), sep = "")
    
    # Now add number of IGC events of the 492 sites
    for(i in 1:dim(log_info)[1]){
      pos_from <- log_info[i, "start_pos"] + 1
      pos_to   <- log_info[i, "stop_pos"] + 1
      pos.list[pos_from:pos_to] <- pos.list[pos_from:pos_to] + 1
    }
    rownames(pos.list) <- paste("site_", 1:492, sep = "")
    colnames(pos.list) <- paste("sim_", toString(sim), sep = "")
    sim.tract <- cbind(sim.tract, rbind(new.info, pos.list))
  }
  assign(paste("sim.tract.", toString(tract), sep = ""), sim.tract)
}

Total post duplication 0.056233707053073859. Tau = 22.58153.

Tau = 22.58153
blen = 0.056233707053073859
for(tract in Tract.list){
  sim.summary <- get(paste("sim.tract.", toString(tract), sep = ""))
  
  
  plot(sim.summary["num 1->2 IGC", ] / sim.summary["num IGC", ], 
       main = paste("% 1->2 IGC events, Tract = ", toString(tract), ""))
  abline(h = 0.5, col = 2)
  
  x = 0:11 * 0.1
  n <- Tau / tract * (tract + 491) * 2 * blen
  hist(sim.summary["num 1->2 IGC", ] / sim.summary["num IGC", ], prob=TRUE, breaks = x - 0.05,
        main = paste("% 1->2 IGC Histogram, Tract = ", toString(tract), sep = ""))
  lines(x, dnorm(x, 0.5, 0.5/sqrt(n)), col = 2)
  
  plot(rowMeans(sim.summary[10:501, ]), 
       main = paste("Mean Experienced # IGC across sites, Tract = ", toString(tract), ""))
  abline(h = 22.58153 * 2 * 0.056233707053073859, col = 2)

  plot(rowMeans(sim.summary[10:501, ]^2) - rowMeans(sim.summary[10:501, ])^2, 
       main = paste("Var Experienced # IGC across sites, Tract = ", toString(tract), ""))
  abline(h = 22.58153 * 2 * 0.056233707053073859, col = 2)
  
  # All sites
  x =  0:(max(sim.summary[10:501,]) + 1)
  hist(sim.summary[10:501,], prob=TRUE, breaks = x - 0.5,
        main = paste("Histogram, Tract = ", toString(tract), sep = ""))
  lines(x, dpois(x, Tau * 2 * blen), col = 2)  
  
  # First site
  x =  0:(max(sim.summary[10,]) + 1)
  hist(sim.summary[10,], prob=TRUE, breaks = x - 0.5,
        main = paste("Histogram of 1st site, Tract = ", toString(tract), sep = ""))
  lines(x, dpois(x, Tau * 2 * blen), col = 2)  

  # Site 200
  x =  0:(max(sim.summary[209,]) + 1)
  hist(sim.summary[209,], prob=TRUE, breaks = x - 0.5,
        main = paste("Histogram of 200th site, Tract = ", toString(tract), sep = ""))
  lines(x, dpois(x, Tau * 2 * blen), col = 2)  

  # Site 400
  x =  0:(max(sim.summary[409,]) + 1)
  hist(sim.summary[409,], prob=TRUE, breaks = x - 0.5,
        main = paste("Histogram of 400th site, Tract = ", toString(tract), sep = ""))
  lines(x, dpois(x, Tau * 2 * blen), col = 2) 
  #ks.test(sim.summary[10:501,], "ppois", Tau*2*blen)
}